# Replication Archive for 
# Alexander Coppock, Emily Ekins and David Kirby (2018), 
# "The Long-lasting Effects of Newspaper Op-Eds on Public Opinion",
# Quarterly Journal of Political Science: Vol. 13: No. 1, pp 59-87.

rm(list = ls())

# Uncomment to install
#install.packages(c("tidyverse", "xtable", "nnet"))
#devtools::install_github("acoppock/commarobust")

library(tidyverse)
library(xtable)
library(nnet)

# Set working directory to replication archive

load("mturk_opeds_cleaned.rdata")
load("elite_opeds_cleaned.rdata")
load("dv_df.rdata")
source("opeds_source.R")

# Balance and Demographics ------------------------------------------------

age_mturk <- 
  mturk_opeds %$%
  table(age_5, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("18 - 29", "30 - 39", "40 - 49", "50 - 59", "60+"))) %>%
  arrange(rowname)

educ_mturk <- 
  mturk_opeds %$%
  table(educ_5, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("Less than High School", "High School", "Some College", "College", "Graduate School"))) %>%
  arrange(rowname)

race_mturk <- 
  mturk_opeds %$%
  table(race_4, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("Black", "Hispanic", "White", "Other"))) %>%
  arrange(rowname)

pid_mturk <- 
  mturk_opeds %$%
  table(pid_7, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = 1:7,
                          labels = c("Strong Democrat",
                                     "Not very strong Democrat",
                                     "Lean Democrat",
                                     "Independent",
                                     "Lean Republican",
                                     "Not very strong Republican",
                                     "Strong Republican"))) %>%
  arrange(rowname)

ideo_mturk <- 
  mturk_opeds %$%
  table(ideo_5, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("Liberal", "Moderate", "Libertarian", "Conservative", "Other"))) %>%
  arrange(rowname)

female_mturk <- 
  mturk_opeds %$%
  table(female, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c(0, 1), labels = c("Male", "Female"))) %>%
  arrange(rowname)

n_mturk <- 
  mturk_opeds %$% 
  table(Z) %>%
  as.matrix(.) %>% t %>%
  data.frame() %>%
  rownames_to_column() %>%
  mutate(rowname = "N")



xtable(age_mturk, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(educ_mturk, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(race_mturk, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(pid_mturk, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(ideo_mturk, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(female_mturk, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(n_mturk) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())



age_elite <- 
  elite_opeds %$%
  table(age_5, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("18 - 29", "30 - 39", "40 - 49", "50 - 59", "60+"))) %>%
  arrange(rowname)

educ_elite <- 
  elite_opeds %$%
  table(educ_5, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("Less than High School", "High School", "Some College", "College", "Graduate School"))) %>%
  arrange(rowname)

race_elite <- 
  elite_opeds %$%
  table(race_4, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("Black", "Hispanic", "White", "Other"))) %>%
  arrange(rowname)

pid_elite <- 
  elite_opeds %$%
  table(pid_7, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = 1:7,
                          labels = c("Strong Democrat",
                                     "Not very strong Democrat",
                                     "Lean Democrat",
                                     "Independent",
                                     "Lean Republican",
                                     "Not very strong Republican",
                                     "Strong Republican"))) %>%
  arrange(rowname)


ideo_elite <- 
  elite_opeds %$%
  table(ideo_5, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c("Liberal", "Moderate", "Libertarian", "Conservative", "Other"))) %>%
  arrange(rowname)

female_elite <- 
  elite_opeds %$%
  table(female, Z) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.matrix(.) %>%
  rownames_to_column() %>%
  mutate(rowname = factor(rowname, levels = c(0, 1), labels = c("Male", "Female"))) %>%
  arrange(rowname)


n_elite <- 
  elite_opeds %$% 
  table(Z) %>%
  as.matrix(.) %>% t %>%
  data.frame() %>%
  rownames_to_column() %>%
  mutate(rowname = "N")

xtable(age_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(educ_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(race_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(pid_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(ideo_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(female_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())
xtable(n_elite, digits = 3) %>% print.xtable(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE, hline.after = c())


# Compare Elite and Mturk samples -----------------------------------------

opeds_all <- bind_rows(mturk_opeds %>% select(age_5, educ_5, race_4, pid_7, ideo_5, female) %>% mutate(version = "mturk"),
                       elite_opeds %>% select(age_5, educ_5, race_4, pid_7, ideo_5, female) %>% mutate(version = "elite"))


opeds_all %$% table(age_5, version) %>% chisq.test(.)
opeds_all %$% table(educ_5, version) %>% chisq.test(.)
opeds_all %$% table(race_4, version) %>% chisq.test(.)
opeds_all %$% table(pid_7, version) %>% chisq.test(.)
opeds_all %$% table(ideo_5, version) %>% chisq.test(.)
opeds_all %$% table(female, version) %>% chisq.test(.)


# RI for Balance Test -----------------------------------------------------

formula_u <- "Z ~ as.factor(age_5) + as.factor(educ_5) + as.factor(race_4) + as.factor(ideo_5) + as.factor(female) + pid_7"
formula_r <- "Z~1"

fit.u <- multinom(formula_u, data = mturk_opeds)
fit.r <- multinom(formula_r, data = mturk_opeds)
llr_mturk_obs <- fit.r$deviance - fit.u$deviance

fit.u <- multinom(formula_u, data = elite_opeds)
fit.r <- multinom(formula_r, data = elite_opeds)
llr_elite_obs <- fit.r$deviance - fit.u$deviance

# This sim can take a while
set.seed(343)
# sims <- 1000
sims <- 50
llr_mturk_sim <- llr_elite_sim <- rep(NA, sims)
for (i in 1:sims) {
  mturk_opeds_sim <-
    mutate(mturk_opeds, Z = sample(unique(Z), size = length(Z), replace = TRUE))
  fit.u <- multinom(formula_u, data = mturk_opeds_sim)
  fit.r <- multinom(formula_r, data = mturk_opeds_sim)
  llr_mturk_sim[i] <- fit.r$deviance - fit.u$deviance
  
  elite_opeds_sim <-
    mutate(elite_opeds, Z = sample(unique(Z), size = length(Z), replace = TRUE))
  fit.u <- multinom(formula_u, data = elite_opeds_sim)
  fit.r <- multinom(formula_r, data = elite_opeds_sim)
  llr_elite_sim[i] <- fit.r$deviance - fit.u$deviance
}

label_df <- 
  tibble(
    `Observed Test Statistic` = c(llr_mturk_obs, llr_elite_obs),
    `Experimental Sample` = c("MTurk", "Elite")
  )

df <- tibble(
  `Simulated Test Statistic` = c(llr_mturk_sim, llr_elite_sim),
  `Experimental Sample` = rep(c("MTurk", "Elite"), c(sims, sims))
)

g <- 
ggplot(df, aes(x = `Simulated Test Statistic`)) +
  geom_histogram() +
  geom_vline(data = label_df, aes(xintercept = `Observed Test Statistic`),
             color = "red", linetype = "dashed") +
  facet_wrap(~`Experimental Sample`, scales = "free") +
  theme_bw() +
  theme(axis.title.y = element_blank(),
        strip.background = element_blank())

g

p_elite <- mean(llr_elite_sim >= llr_elite_obs)
p_elite
p_mturk <- mean(llr_mturk_sim >= llr_mturk_obs)
p_mturk


# What kind of elite? -----------------------------------------------------
elite_opeds <- 
  within(elite_opeds,{
    elite_type[is.na(elite_type)] <- "Other"
  })
table(elite_opeds$elite_type, exclude = NULL)


elite_types <-
  elite_opeds %>%
  group_by(elite_type) %>%
  summarize(Number = n(),
            Percent = 100 * Number / 2181) %>%
  arrange(-Number)

print(xtable(elite_types), hline.after = c(), 
      include.rownames = FALSE,
      include.colnames = FALSE, only.contents = TRUE)





